

library(tidyverse)
library(ggpattern)

# set replication folder as working directory
setwd("~replication")


# load background data for the sample of candidates
load("data_genderedcost_background.rdata")

# only include completed answers - and answers given before deadline
# 2021-12-20 21:49:52 was the last response within the time frame
df_background <- df_background %>% 
  filter(SurveyStatus==2) %>%
  filter(SurveyEndTime<="2021-12-20 21:49:52")


# load aggregated data of background variables for the entire candidate pool
load("data_rep_list.Rdata")
# assign as list elements into global environment
list2env(rep_list, .GlobalEnv)

### FIGURE D1 - AGE
# find age distribution in sample
sample_shares <- df_background %>%
  count(age) %>% 
  mutate(prop_age = prop.table(n)) %>% 
  mutate(sex = factor("Full Sample"))

subset_shares <- df_background %>% 
  count(sex, age) %>% 
  group_by(sex) %>% 
  mutate(prop_age = prop.table(n)) %>% 
  mutate(sex = factor(ifelse(sex=="Man","Men in sample","Women in sample")))

shares <- bind_rows(sample_shares, subset_shares)

## Age distribution in sample
age_sample <- shares %>% 
  ggplot(data=., aes(x=age, y=prop_age, fill = sex)) +
  geom_col(#color = "black",
    position = position_dodge2(width = 0.2)) +
  theme_bw() +
  ggtitle("Sample") +
  ylab("Share") +
  xlab("Age of candidate") +
  scale_fill_grey("") +
  scale_color_grey("") +
  scale_x_continuous(breaks = seq(20,80,10), labels = seq(20,80,10)) +
  facet_wrap(~sex, ncol = 3) +
  theme(legend.position = "none", panel.background = element_rect(fill = "white"),
        strip.background = element_rect("white"),
        strip.text = element_text(hjust = 0, face = "bold"),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank())

## Age distribution in the entire candidate pool
age_pool <- age %>%
  mutate(sex = factor(sex, levels = c("Full Population", "All Men Candidates", "All Women Candidates"))) %>%
  ggplot(data=., aes(x=age, y=prop_age, fill = sex)) +
  geom_col(#color = "black",
    position = position_dodge2(width = 0.2)) +
  theme_bw() +
  ggtitle("All municipal candidates 2021") +
  ylab("Share") +
  xlab("Age of candidate") +
  scale_fill_grey("") +
  scale_x_continuous(breaks = seq(20,80,10), labels = seq(20,80,10)) +
  facet_wrap(~sex,ncol=3) +
  theme(legend.position = "none", panel.background = element_rect(fill = "white"),
        strip.background = element_rect("white"),
        strip.text = element_text(hjust = 0, face = "bold"),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank())

age_sample / age_pool

ggsave("figureD1.pdf", height = 5.6, width = 6)


### FIGURE D2: Marriage
# find marriage propensity in sample
sample_shares <- df_background %>%
  count(marital_status) %>% 
  mutate(prop_civst = prop.table(n)) %>% 
  mutate(sex = factor("All")) %>%
  rename(civst = marital_status)

subset_shares <- df_background %>% 
  count(sex, marital_status) %>% 
  group_by(sex) %>% 
  mutate(prop_civst = prop.table(n)) %>% 
  mutate(sex = factor(ifelse(sex=="Man","Men","Women"))) %>%
  rename(civst = marital_status)

shares <- bind_rows(sample_shares, subset_shares) %>%
  mutate(sample = "Sample")

# Find marriage propensity in the entire candidate pool
married <- married %>% 
  mutate(sample = "All Municipal Candidates 2021") %>%
  mutate(sex = ifelse(sex=="Full Sample", "All", sex))

# merge
married_all <- bind_rows(shares, married)

married_all %>%
  filter(civst=="Married") %>%
  ggplot(data=., aes(x=sex, y=prop_civst, fill = sex, color = sample )) + #, fill = sex, pattern_type=sample)) +
  geom_col(#color = "black",
    position = position_dodge2(width = 0.2),
    linewidth = 0.7) +
  theme_bw() +
  ylab("Share") +
  xlab("") +
  scale_fill_manual("", values = c("grey30", "grey60", "grey80"), guide = "none") +
  scale_color_manual(values = c("black", "white")) +
  scale_pattern_type_discrete(choices = gridpattern::names_polygon_tiling) + 
  facet_wrap(~civst, ncol=4) +
  coord_cartesian(ylim=c(0,0.7)) +
  geom_text(aes(label = round(prop_civst,digits = 2), hjust = sample),
            color = "black",
            position = position_dodge(width = 0.9), vjust = -0.5, size = 3) +
  theme(legend.position = "none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank())

ggsave("figureD2.pdf", height = 3, width = 4)


### FIGURE D3: Education
# find educational distribution in sample
sample_shares <- df_background %>%
  count(education) %>% 
  mutate(prop_edu = prop.table(n)) %>% 
  mutate(sex = factor("All"))

subset_shares <- df_background %>% 
  count(sex, education) %>% 
  group_by(sex) %>% 
  mutate(prop_edu = prop.table(n)) %>% 
  mutate(sex = factor(ifelse(sex=="Man","Men","Women")))

shares <- bind_rows(sample_shares, subset_shares) %>%
  mutate(education = as.character(education)) %>% 
  mutate(education = case_when(education=="Short, higher education (>3 years)"~"Short, higher\n(>3 years)",
                               education=="Bachelor/professional degree"~"Professional or\nbachelor degree",
                               TRUE~education)) %>% 
  mutate(education = factor(education, levels = c("Primary School", "High School", "Vocational",
                                                  "Short, higher\n(>3 years)",
                                                  "Professional or\nbachelor degree",
                                                  "Long, higher", "Other"))) %>%
  mutate(sample = "Sample")

# merge with educational distribution in entire pool
education <- education %>%
  mutate(education = case_when(education=="Professional or\r\nbachelor degree"~"Professional or\nbachelor degree",
                               education=="Short, higher\r\n(>3 years)"~"Short, higher\n(>3 years)",
                               TRUE~education)) %>%
  mutate(sex = ifelse(sex=="Full Population", "All", sex))

edu_df <- bind_rows(shares, education) %>% 
  mutate(education = factor(education, levels = c("Primary School", "High School", "Vocational",
                                                  "Short, higher\n(>3 years)",
                                                  "Professional or\nbachelor degree",
                                                  "Long, higher", "Other")))

edu_df %>%
  ggplot(data=., aes(x=sex, y=prop_edu, fill = sex, color = sample )) +
  geom_col(position = position_dodge2(width = 0.2),
           linewidth = 0.7) +
  theme_bw() +
  ylab("Share") +
  xlab("") +
  scale_fill_manual("", values = c("grey30", "grey60", "grey80"), guide = "none") +
  scale_color_manual(values = c("black", "white")) +
  facet_wrap(~education, ncol=4) +
  coord_cartesian(ylim=c(0,0.4)) +
  geom_text(aes(label = round(prop_edu,digits = 2), hjust = sample),
            color = "black",
            position = position_dodge(width = 0.9), vjust = -0.5, size = 3) +
  theme(legend.position = "none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank())

ggsave("figureD3.pdf", height = 6, width = 10)


### FIGURE D4: Electoral performance
# find electoral performance in sample
sample_shares <- df_background %>%
  count(electoral_performance) %>% 
  mutate(prop_exp = prop.table(n)) %>% 
  mutate(sex = factor("All"))

subset_shares <- df_background %>% 
  count(sex, electoral_performance) %>% 
  group_by(sex) %>% 
  mutate(prop_exp = prop.table(n)) %>% 
  mutate(sex = factor(ifelse(sex=="Man","Men","Women")))

shares <- bind_rows(sample_shares, subset_shares) %>% 
  mutate(sample = "Sample") %>%
  rename(pol_exp = electoral_performance)

# find electoral performance in entire candidate pool
electoralperformance <- electoralperformance %>%
  mutate(sample = "All Municipal Candidates 2021") %>%
  mutate(sex = ifelse(sex=="Full Sample", "All", sex))

elec_perf_all <- bind_rows(shares, electoralperformance) %>%
  mutate(pol_exp = case_when(pol_exp=="Not elected, but elected previously"~"Not elected, \nElected previously",
                             pol_exp=="Not elected, not elected previously"~"Not elected,\nNot elected previously",
                             pol_exp=="Not elected, not elected previosly"~"Not elected,\nNot elected previously",
                             TRUE~pol_exp))

elec_perf_all %>%
  ggplot(data=., aes(x=sex, y=prop_exp, fill = sex, color = sample )) + 
  geom_col(position = position_dodge2(width = 0.2),
           linewidth = 0.7) +
  theme_bw() +
  ylab("Share") +
  xlab("") +
  scale_fill_manual("", values = c("grey30", "grey60", "grey80"), guide = "none") +
  scale_color_manual(values = c("black", "white")) +
  facet_wrap(~pol_exp, ncol=4) +
  coord_cartesian(ylim=c(0,0.75)) +
  geom_text(aes(label = round(prop_exp,digits = 2), hjust = sample),
            color = "black",
            position = position_dodge(width = 0.9), vjust = -0.5, size = 3) +
  theme(legend.position = "none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank())

ggsave("figureD4.pdf", height = 3, width = 10)
